##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 5: Comparative Application: Sentiment in European Parliaments

##########################
# Intro
rm(list = ls())
require(quanteda)
require(tidyverse)
library(zoo)
library(ggplot2)
library(jtools)
library(dplyr)
library(plm)
library(stargazer)
library(margins)
library(sjPlot)
library(sjmisc)

library(rstudioapi)

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))

# turn a date into a 'monthnumber' relative to an origin
monnb <- function(d) { lt <- as.POSIXlt(as.Date(d, origin="1900-01-01"));
lt$year*12 + lt$mon } 
# compute a month difference as a difference between two monnb's
mondf <- function(d1, d2) { monnb(d2) - monnb(d1) }
#https://stackoverflow.com/a/1995984

#download the CHES Dataset trendfile: https://www.chesdata.eu/1999-2014-chapel-hill-expert-survey-ches-trend-file
#Polk, Jonathan, Jan Rovny, Ryan Bakker, Erica Edwards, Liesbet Hooghe, Seth Jolly, Jelle Koedam, Filip Kostelka, Gary Marks, Gijs Schumacher, Marco Steenbergen, Milada Vachudova and Marko Zilovic. 2017. "Explaining the salience of anti-elitism and reducing political corruption for political parties in Europe with the 2014 Chapel Hill Expert Survey data," Research & Politics (January-March): 1-9. 
#and
#Ryan Bakker, Catherine de Vries, Erica Edwards, Liesbet Hooghe, Seth Jolly, Gary Marks, Jonathan Polk, Jan Rovny, Marco Steenbergen, and Milada Vachudova. 2015."Measuring party positions in Europe: The Chapel Hill expert survey trend file, 1999-2010." Party Politics 21.1: 143-152.

ches=read.csv("1999-2014_CHES_dataset_means.csv",stringsAsFactors = F)
ches=ches%>%filter(country%in%c("esp","fin","cz","ge","nl","uk","sv"))%>%
  select(country,year,party,lrgen,galtan)
#Recode
ches$lrgen[ches$lrgen=="center"]=5
ches$lrgen[ches$lrgen=="extreme left"]=0
ches$lrgen[ches$lrgen=="extreme right"]=10
#Recode
ches$galtan[ches$galtan=="center"]=5

###############################################
# Spain
###############################################

load("5_spain_senti.RData")
#Calculate Sentiment
senti_spain$Sentiment=log((senti_spain$pos+0.5)/(senti_spain$neg+0.5))
#Extract the Spanish parties in the dataset
spanish_parties=ches%>%filter(country=="esp")%>%
  filter(party%in%c("CC","CHA","CiU","ERC","IU","PA","PNV","PP","PSOE","UPyD"))
spanish_parties_expanded=data.frame(year=rep(1999:2017,length(unique(spanish_parties$party))),
                                    party=c(rep(unique(spanish_parties$party)[1],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[2],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[3],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[4],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[5],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[6],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[7],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[8],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[9],length(1999:2017)),
                                            rep(unique(spanish_parties$party)[10],length(1999:2017))))
spanish_parties_expanded=left_join(spanish_parties_expanded,spanish_parties)
#Add in positional estimates from CHES Dataset
for(i in 1:nrow(spanish_parties_expanded)){
  if(is.na(spanish_parties_expanded$lrgen[i])){
    spanish_parties_expanded$lrgen[i]=spanish_parties_expanded$lrgen[i-1]
  }
  if(is.na(spanish_parties_expanded$galtan[i])){
    spanish_parties_expanded$galtan[i]=spanish_parties_expanded$galtan[i-1]
  }
}
spanish_parties_expanded$lrgen[spanish_parties_expanded$party=="UPyD"&spanish_parties_expanded$year<2010]=NA
spanish_parties_expanded$galtan[spanish_parties_expanded$party=="UPyD"&spanish_parties_expanded$year<2010]=NA
spanish_parties_expanded$lrgen[spanish_parties_expanded$party=="CHA"&spanish_parties_expanded$year<2006]=NA
spanish_parties_expanded$galtan[spanish_parties_expanded$party=="CHA"&spanish_parties_expanded$year<2006]=NA
spanish_parties_expanded$lrgen[spanish_parties_expanded$party=="PA"&spanish_parties_expanded$year<2002]=NA
spanish_parties_expanded$galtan[spanish_parties_expanded$party=="PA"&spanish_parties_expanded$year<2002]=NA

senti_spain$year=as.integer(format(as.Date(senti_spain$date, format="%Y-%m-%d"),"%Y"))
#Add in positional estimates to the sentiment measures
senti_spain=left_join(senti_spain,spanish_parties_expanded)
spanish_parties_expanded_pm=spanish_parties_expanded
spanish_parties_expanded_pm=rename(spanish_parties_expanded_pm,Prime_Minister=party)
spanish_parties_expanded_pm=rename(spanish_parties_expanded_pm,lrgen_pm=lrgen)
spanish_parties_expanded_pm=rename(spanish_parties_expanded_pm,galtan_pm=galtan)
#Add in positional estimates of the prime minister party to the sentiment measures
senti_spain=left_join(senti_spain,spanish_parties_expanded_pm)
senti_spain=senti_spain%>%filter(!is.na(lrgen))
senti_spain$lrgen=as.numeric(senti_spain$lrgen)
senti_spain$lrgen_pm=as.numeric(senti_spain$lrgen_pm)
senti_spain$galtan=as.numeric(senti_spain$galtan)
senti_spain$galtan_pm=as.numeric(senti_spain$galtan_pm)
senti_spain$lrgen_dist_to_pm=abs(senti_spain$lrgen-senti_spain$lrgen_pm)
senti_spain$galtan_dist_to_pm=abs(senti_spain$galtan-senti_spain$galtan_pm)


###############################################
# Finland
###############################################

load("5_finland_senti.RData")
#Calculate Sentiment
senti_finland$Sentiment=log((senti_finland$pos+0.5)/(senti_finland$neg+0.5))
senti_finland$party[senti_finland$party=="RKP"]="RKP/SFP"
finnish_parties=ches%>%filter(country=="fin")%>%
  filter(party%in%c("KD","KESK","KOK","PS","RKP/SFP","SFP","SDP","SKL","VAS","VIHR"))
table(finnish_parties$party,finnish_parties$year)
table(senti_finland$party)
finnish_parties$party=recode(finnish_parties$party, "KD" = "KD/SKL",
                             "KESK" = "Kesk",
                             "KOK" = "Kok",
                             "SFP" = "RKP/SFP",
                             "SKL" = "KD/SKL",
                             "VAS" = "Vas",
                             "VIHR" = "Vihreät")
table(finnish_parties$party,finnish_parties$year)

finnish_parties_expanded=data.frame(year=rep(1999:2017,length(unique(finnish_parties$party))),
                                    party=c(rep(unique(finnish_parties$party)[1],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[2],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[3],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[4],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[5],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[6],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[7],length(1999:2017)),
                                            rep(unique(finnish_parties$party)[8],length(1999:2017))))
finnish_parties_expanded=left_join(finnish_parties_expanded,finnish_parties)

#Add in positional estimates from CHES Dataset
for(i in 1:nrow(finnish_parties_expanded)){
  if(is.na(finnish_parties_expanded$lrgen[i])){
    finnish_parties_expanded$lrgen[i]=finnish_parties_expanded$lrgen[i-1]
  }
  if(is.na(finnish_parties_expanded$galtan[i])){
    finnish_parties_expanded$galtan[i]=finnish_parties_expanded$galtan[i-1]
  }
}

senti_finland$year=as.integer(format(as.Date(senti_finland$date, format="%Y-%m-%d"),"%Y"))
str(senti_finland)
str(finnish_parties_expanded)
senti_finland=left_join(senti_finland,finnish_parties_expanded)
senti_finland=senti_finland%>%filter(!is.na(lrgen))
finnish_parties_expanded_pm=finnish_parties_expanded
finnish_parties_expanded_pm=rename(finnish_parties_expanded_pm,Prime_Minister=party)
finnish_parties_expanded_pm=rename(finnish_parties_expanded_pm,lrgen_pm=lrgen)
finnish_parties_expanded_pm=rename(finnish_parties_expanded_pm,galtan_pm=galtan)

senti_finland=left_join(senti_finland,finnish_parties_expanded_pm)
senti_finland=senti_finland%>%filter(!is.na(lrgen))
senti_finland$lrgen=as.numeric(senti_finland$lrgen)
senti_finland$lrgen_pm=as.numeric(senti_finland$lrgen_pm)
senti_finland$galtan=as.numeric(senti_finland$galtan)
senti_finland$galtan_pm=as.numeric(senti_finland$galtan_pm)
senti_finland$lrgen_dist_to_pm=abs(senti_finland$lrgen-senti_finland$lrgen_pm)
senti_finland$galtan_dist_to_pm=abs(senti_finland$galtan-senti_finland$galtan_pm)
table(senti_finland$party,senti_finland$year)


###############################################
# Germany
###############################################

load("5_germany_senti.RData")
table(senti_germany$party)
german_parties=ches%>%filter(country=="ge")%>%
  filter(party%in%c("CDU","SPD","FDP","Grunen","CSU","PDS","Linkspartei/PDS","LINKE"))
#Fix spelling
german_parties$party=recode(german_parties$party, "Grunen" = "GRUENE",
                            "LINKE" = "PDS/LINKE",
                            "Linkspartei/PDS" = "PDS/LINKE",
                            "PDS" = "PDS/LINKE",
                            "CDU" = "CDU/CSU",
                            "CSU" = "CDU/CSU")
table(german_parties$party,german_parties$year)
german_parties=german_parties %>%
  group_by(party,year) %>%
  summarise(lrgen = mean(as.numeric(lrgen)),
            galtan = mean(as.numeric(galtan)))
german_parties$country="de"
german_parties_expanded=data.frame(year=rep(1999:2017,length(unique(german_parties$party))),
                                   party=c(rep(unique(german_parties$party)[1],length(1999:2017)),
                                           rep(unique(german_parties$party)[2],length(1999:2017)),
                                           rep(unique(german_parties$party)[3],length(1999:2017)),
                                           rep(unique(german_parties$party)[4],length(1999:2017)),
                                           rep(unique(german_parties$party)[5],length(1999:2017))))
german_parties_expanded=left_join(german_parties_expanded,german_parties)
#Add in positional estimates from CHES Dataset
for(i in 1:nrow(german_parties_expanded)){
  if(is.na(german_parties_expanded$lrgen[i])){
    german_parties_expanded$lrgen[i]=german_parties_expanded$lrgen[i-1]
  }
  if(is.na(german_parties_expanded$galtan[i])){
    german_parties_expanded$galtan[i]=german_parties_expanded$galtan[i-1]
  }
}
#Calculate Sentiment
senti_germany$Sentiment=log((senti_germany$pos+0.5)/(senti_germany$neg+0.5))
senti_germany$year=as.integer(format(as.Date(senti_germany$date, format="%Y-%m-%d"),"%Y"))
senti_germany=left_join(senti_germany,german_parties_expanded)
senti_germany=senti_germany%>%filter(!is.na(lrgen))
german_parties_expanded_pm=german_parties_expanded
german_parties_expanded_pm=rename(german_parties_expanded_pm,Prime_Minister=party)
german_parties_expanded_pm=rename(german_parties_expanded_pm,lrgen_pm=lrgen)
german_parties_expanded_pm=rename(german_parties_expanded_pm,galtan_pm=galtan)

senti_germany=left_join(senti_germany,german_parties_expanded_pm)
senti_germany=senti_germany%>%filter(!is.na(lrgen))
senti_germany$lrgen=as.numeric(senti_germany$lrgen)
senti_germany$lrgen_pm=as.numeric(senti_germany$lrgen_pm)
senti_germany$galtan=as.numeric(senti_germany$galtan)
senti_germany$galtan_pm=as.numeric(senti_germany$galtan_pm)
senti_germany$lrgen_dist_to_pm=abs(senti_germany$lrgen-senti_germany$lrgen_pm)
senti_germany$galtan_dist_to_pm=abs(senti_germany$galtan-senti_germany$galtan_pm)
table(senti_germany$party)

###############################################
# Czech Republic
###############################################
load("5_czech_republic_senti.RData")
#Calculate Sentiment
senti_czech_republic$Sentiment=log((senti_czech_republic$pos+0.5)/(senti_czech_republic$neg+0.5))
#Fix spelling
senti_czech_republic$party[senti_czech_republic$party=="TOP 09 a Starostove"]="TOP09"
czech_parties=ches%>%filter(country=="cz")%>%
  filter(party%in%c("ANO2011","CSSD","KDU-CSL","KSCM","ODS","SZ","TOP09","US-DEU","USVIT","VV"))
table(czech_parties$party,czech_parties$year)
table(senti_czech_republic$party)
czech_parties$party=recode(czech_parties$party, "ANO2011" = "ANO",
                           "USVIT" = "Usvit")
table(czech_parties$party,czech_parties$year)

czech_parties_expanded=data.frame(year=rep(2002:2017,length(unique(czech_parties$party))),
                                  party=c(rep(unique(czech_parties$party)[1],length(2002:2017)),
                                          rep(unique(czech_parties$party)[2],length(2002:2017)),
                                          rep(unique(czech_parties$party)[3],length(2002:2017)),
                                          rep(unique(czech_parties$party)[4],length(2002:2017)),
                                          rep(unique(czech_parties$party)[5],length(2002:2017)),
                                          rep(unique(czech_parties$party)[6],length(2002:2017)),
                                          rep(unique(czech_parties$party)[7],length(2002:2017)),
                                          rep(unique(czech_parties$party)[8],length(2002:2017)),
                                          rep(unique(czech_parties$party)[9],length(2002:2017)),
                                          rep(unique(czech_parties$party)[10],length(2002:2017))))
czech_parties_expanded=left_join(czech_parties_expanded,czech_parties)

#Add in positional estimates from CHES Dataset
for(i in 1:nrow(czech_parties_expanded)){
  if(is.na(czech_parties_expanded$lrgen[i])){
    czech_parties_expanded$lrgen[i]=czech_parties_expanded$lrgen[i-1]
  }
  if(is.na(czech_parties_expanded$galtan[i])){
    czech_parties_expanded$galtan[i]=czech_parties_expanded$galtan[i-1]
  }
}
czech_parties_expanded$lrgen[czech_parties_expanded$party=="Usvit"&czech_parties_expanded$year<2014]=NA
czech_parties_expanded$galtan[czech_parties_expanded$party=="Usvit"&czech_parties_expanded$year<2014]=NA
czech_parties_expanded$lrgen[czech_parties_expanded$party=="ANO"&czech_parties_expanded$year<2014]=NA
czech_parties_expanded$galtan[czech_parties_expanded$party=="ANO"&czech_parties_expanded$year<2014]=NA
czech_parties_expanded$lrgen[czech_parties_expanded$party=="VV"&czech_parties_expanded$year<2010]=NA
czech_parties_expanded$galtan[czech_parties_expanded$party=="VV"&czech_parties_expanded$year<2010]=NA
czech_parties_expanded$lrgen[czech_parties_expanded$party=="TOP09"&czech_parties_expanded$year<2010]=NA
czech_parties_expanded$galtan[czech_parties_expanded$party=="TOP09"&czech_parties_expanded$year<2010]=NA
czech_parties_expanded$lrgen[czech_parties_expanded$party=="SZ"&czech_parties_expanded$year<2006]=NA
czech_parties_expanded$galtan[czech_parties_expanded$party=="SZ"&czech_parties_expanded$year<2006]=NA

senti_czech_republic$year=as.integer(format(as.Date(senti_czech_republic$date, format="%Y-%m-%d"),"%Y"))
str(senti_czech_republic)
str(czech_parties_expanded)
senti_czech_republic=left_join(senti_czech_republic,czech_parties_expanded)
senti_czech_republic=senti_czech_republic%>%filter(!is.na(lrgen))
czech_parties_expanded_pm=czech_parties_expanded
czech_parties_expanded_pm=rename(czech_parties_expanded_pm,Prime_Minister=party)
czech_parties_expanded_pm=rename(czech_parties_expanded_pm,lrgen_pm=lrgen)
czech_parties_expanded_pm=rename(czech_parties_expanded_pm,galtan_pm=galtan)

senti_czech_republic=left_join(senti_czech_republic,czech_parties_expanded_pm)
senti_czech_republic=senti_czech_republic%>%filter(!is.na(lrgen))
senti_czech_republic$lrgen=as.numeric(senti_czech_republic$lrgen)
senti_czech_republic$lrgen_pm=as.numeric(senti_czech_republic$lrgen_pm)
senti_czech_republic$galtan=as.numeric(senti_czech_republic$galtan)
senti_czech_republic$galtan_pm=as.numeric(senti_czech_republic$galtan_pm)
senti_czech_republic$lrgen_dist_to_pm=abs(senti_czech_republic$lrgen-senti_czech_republic$lrgen_pm)
senti_czech_republic$galtan_dist_to_pm=abs(senti_czech_republic$galtan-senti_czech_republic$galtan_pm)
table(senti_czech_republic$party,senti_czech_republic$year)
table(senti_czech_republic$date)
###############################################
# Sweden
###############################################

load("5_sweden_senti.RData")
#Calculate Sentiment
senti_sweden$Sentiment=log((senti_sweden$pos+0.5)/(senti_sweden$neg+0.5))
swedish_parties=ches%>%filter(country=="sv")%>%
  filter(party%in%c("C","FP","KD","MP","M","NyD","SAP","SD","V"))
table(swedish_parties$party,swedish_parties$year)
table(senti_sweden$party)
swedish_parties$party=recode(swedish_parties$party, "M" = "MSP",
                             "NyD" = "NYD")
table(swedish_parties$party,swedish_parties$year)

swedish_parties_expanded=data.frame(year=rep(1999:2017,length(unique(swedish_parties$party))),
                                    party=c(rep(unique(swedish_parties$party)[1],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[2],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[3],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[4],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[5],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[6],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[7],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[8],length(1999:2017)),
                                            rep(unique(swedish_parties$party)[9],length(1999:2017))))
swedish_parties_expanded=left_join(swedish_parties_expanded,swedish_parties)

#Add in positional estimates from CHES Dataset
for(i in 1:nrow(swedish_parties_expanded)){
  if(is.na(swedish_parties_expanded$lrgen[i])){
    swedish_parties_expanded$lrgen[i]=swedish_parties_expanded$lrgen[i-1]
  }
  if(is.na(swedish_parties_expanded$galtan[i])){
    swedish_parties_expanded$galtan[i]=swedish_parties_expanded$galtan[i-1]
  }
}
swedish_parties_expanded$lrgen[swedish_parties_expanded$party=="SD"&swedish_parties_expanded$year<2010]=NA
swedish_parties_expanded$galtan[swedish_parties_expanded$party=="SD"&swedish_parties_expanded$year<2010]=NA

senti_sweden$year=as.integer(format(as.Date(senti_sweden$date, format="%Y-%m-%d"),"%Y"))
str(senti_sweden)
str(swedish_parties_expanded)
senti_sweden=left_join(senti_sweden,swedish_parties_expanded)
senti_sweden=senti_sweden%>%filter(!is.na(lrgen))
swedish_parties_expanded_pm=swedish_parties_expanded
swedish_parties_expanded_pm=rename(swedish_parties_expanded_pm,Prime_Minister=party)
swedish_parties_expanded_pm=rename(swedish_parties_expanded_pm,lrgen_pm=lrgen)
swedish_parties_expanded_pm=rename(swedish_parties_expanded_pm,galtan_pm=galtan)

senti_sweden=left_join(senti_sweden,swedish_parties_expanded_pm)
senti_sweden=senti_sweden%>%filter(!is.na(lrgen))
senti_sweden$lrgen=as.numeric(senti_sweden$lrgen)
senti_sweden$lrgen_pm=as.numeric(senti_sweden$lrgen_pm)
senti_sweden$galtan=as.numeric(senti_sweden$galtan)
senti_sweden$galtan_pm=as.numeric(senti_sweden$galtan_pm)
senti_sweden$lrgen_dist_to_pm=abs(senti_sweden$lrgen-senti_sweden$lrgen_pm)
senti_sweden$galtan_dist_to_pm=abs(senti_sweden$galtan-senti_sweden$galtan_pm)
table(senti_sweden$party,senti_sweden$year)

###############################################
# Netherlands
###############################################

load("5_netherlands_senti.RData")
#Calculate Sentiment
senti_netherlands$Sentiment=log((senti_netherlands$pos+0.5)/(senti_netherlands$neg+0.5))
dutch_parties=ches%>%filter(country=="nl")%>%
  filter(party%in%c("CDA","CU","D66","GL","GPV","LPF","PvdA","PvdD","PVV","RPF","SGP","SP","VVD"))
table(dutch_parties$party,dutch_parties$year)
table(senti_netherlands$party)
table(dutch_parties$party,dutch_parties$year)

dutch_parties_expanded=data.frame(year=rep(1999:2017,length(unique(dutch_parties$party))),
                                  party=c(rep(unique(dutch_parties$party)[1],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[2],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[3],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[4],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[5],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[6],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[7],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[8],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[9],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[10],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[11],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[12],length(1999:2017)),
                                          rep(unique(dutch_parties$party)[13],length(1999:2017))))
dutch_parties_expanded=left_join(dutch_parties_expanded,dutch_parties)

#Add in positional estimates from CHES Dataset
for(i in 1:nrow(dutch_parties_expanded)){
  if(is.na(dutch_parties_expanded$lrgen[i])){
    dutch_parties_expanded$lrgen[i]=dutch_parties_expanded$lrgen[i-1]
  }
  if(is.na(dutch_parties_expanded$galtan[i])){
    dutch_parties_expanded$galtan[i]=dutch_parties_expanded$galtan[i-1]
  }
}
dutch_parties_expanded$lrgen[dutch_parties_expanded$party=="LPF"&dutch_parties_expanded$year<2002]=NA
dutch_parties_expanded$galtan[dutch_parties_expanded$party=="LPF"&dutch_parties_expanded$year<2002]=NA
dutch_parties_expanded$lrgen[dutch_parties_expanded$party=="PVV"&dutch_parties_expanded$year<2006]=NA
dutch_parties_expanded$galtan[dutch_parties_expanded$party=="PVV"&dutch_parties_expanded$year<2006]=NA
dutch_parties_expanded$lrgen[dutch_parties_expanded$party=="PvdD"&dutch_parties_expanded$year<2010]=NA
dutch_parties_expanded$galtan[dutch_parties_expanded$party=="PvdD"&dutch_parties_expanded$year<2010]=NA

senti_netherlands$year=as.integer(format(as.Date(senti_netherlands$date, format="%Y-%m-%d"),"%Y"))
str(senti_netherlands)
str(dutch_parties_expanded)
senti_netherlands=left_join(senti_netherlands,dutch_parties_expanded)
senti_netherlands=senti_netherlands%>%filter(!is.na(lrgen))
dutch_parties_expanded_pm=dutch_parties_expanded
dutch_parties_expanded_pm=rename(dutch_parties_expanded_pm,Prime_Minister=party)
dutch_parties_expanded_pm=rename(dutch_parties_expanded_pm,lrgen_pm=lrgen)
dutch_parties_expanded_pm=rename(dutch_parties_expanded_pm,galtan_pm=galtan)

senti_netherlands=left_join(senti_netherlands,dutch_parties_expanded_pm)
senti_netherlands=senti_netherlands%>%filter(!is.na(lrgen))
senti_netherlands$lrgen=as.numeric(senti_netherlands$lrgen)
senti_netherlands$lrgen_pm=as.numeric(senti_netherlands$lrgen_pm)
senti_netherlands$galtan=as.numeric(senti_netherlands$galtan)
senti_netherlands$galtan_pm=as.numeric(senti_netherlands$galtan_pm)
senti_netherlands$lrgen_dist_to_pm=abs(senti_netherlands$lrgen-senti_netherlands$lrgen_pm)
senti_netherlands$galtan_dist_to_pm=abs(senti_netherlands$galtan-senti_netherlands$galtan_pm)
table(senti_netherlands$party,senti_netherlands$year)

###############################################
# United Kingdom
###############################################

load("5_uk_senti.RData")
#Calculate Sentiment
senti_uk=senti_uk%>%rename(pos=pos.pos)
senti_uk=senti_uk%>%rename(neg=neg.neg)
senti_uk$Sentiment=log((senti_uk$pos+0.5)/(senti_uk$neg+0.5))
uk_parties=ches%>%filter(country=="uk")%>%
  filter(party%in%c("CONS","GREEN","LAB","LibDem","PLAID","SNP","UKIP"))
table(uk_parties$party,uk_parties$year)
table(senti_uk$party)
uk_parties$party=recode(uk_parties$party, "CONS" = "Con",
                        "LAB" = "Lab",
                        "PLAID" = "PlaidCymru",
                        "GREEN" = "GPEW")
table(uk_parties$party,uk_parties$year)

uk_parties_expanded=data.frame(year=rep(1999:2017,length(unique(uk_parties$party))),
                               party=c(rep(unique(uk_parties$party)[1],length(1999:2017)),
                                       rep(unique(uk_parties$party)[2],length(1999:2017)),
                                       rep(unique(uk_parties$party)[3],length(1999:2017)),
                                       rep(unique(uk_parties$party)[4],length(1999:2017)),
                                       rep(unique(uk_parties$party)[5],length(1999:2017)),
                                       rep(unique(uk_parties$party)[6],length(1999:2017)),
                                       rep(unique(uk_parties$party)[7],length(1999:2017))))
uk_parties_expanded=left_join(uk_parties_expanded,uk_parties)

#Add in positional estimates from CHES Dataset
for(i in 1:nrow(uk_parties_expanded)){
  if(is.na(uk_parties_expanded$lrgen[i])){
    uk_parties_expanded$lrgen[i]=uk_parties_expanded$lrgen[i-1]
  }
  if(is.na(uk_parties_expanded$galtan[i])){
    uk_parties_expanded$galtan[i]=uk_parties_expanded$galtan[i-1]
  }
}

senti_uk$year=as.integer(format(as.Date(senti_uk$date, format="%Y-%m-%d"),"%Y"))
str(senti_uk)
str(uk_parties_expanded)
senti_uk=left_join(senti_uk,uk_parties_expanded)
senti_uk=senti_uk%>%filter(!is.na(lrgen))
uk_parties_expanded_pm=uk_parties_expanded
uk_parties_expanded_pm=rename(uk_parties_expanded_pm,Prime_Minister=party)
uk_parties_expanded_pm=rename(uk_parties_expanded_pm,lrgen_pm=lrgen)
uk_parties_expanded_pm=rename(uk_parties_expanded_pm,galtan_pm=galtan)

senti_uk=left_join(senti_uk,uk_parties_expanded_pm)
senti_uk=senti_uk%>%filter(!is.na(lrgen))
senti_uk$lrgen=as.numeric(senti_uk$lrgen)
senti_uk$lrgen_pm=as.numeric(senti_uk$lrgen_pm)
senti_uk$galtan=as.numeric(senti_uk$galtan)
senti_uk$galtan_pm=as.numeric(senti_uk$galtan_pm)
senti_uk$lrgen_dist_to_pm=abs(senti_uk$lrgen-senti_uk$lrgen_pm)
senti_uk$galtan_dist_to_pm=abs(senti_uk$galtan-senti_uk$galtan_pm)
table(senti_uk$party,senti_uk$year)


###############################################
# Combine all legislatures
###############################################

senti_role_all=rbind(senti_czech_republic,senti_finland,senti_germany,
                     senti_netherlands,senti_spain,senti_sweden,senti_uk)

#Aggregate at party level
party_level_senti=aggregate(Sentiment~date+party+country_name+galtan_dist_to_pm+lrgen_dist_to_pm+last_year_before_election+Role+marginality,senti_role_all,FUN=mean,na.rm=T)

party_level_senti$party_country=paste(party_level_senti$party,party_level_senti$country_name,sep="_")

party_level_senti$Role=factor(party_level_senti$Role)
party_level_senti$Role=relevel(party_level_senti$Role, "Opposition")
party_level_senti$Minority_gov=ifelse(party_level_senti$marginality<=0.5,1,0)
#Create .pdata frame for fixed effects model
party_level_senti.pdata <- pdata.frame(party_level_senti, index = "party_country")
#Run fixed effects model with party fixed effects
mod.party_level_senti <- plm(Sentiment~(last_year_before_election+lrgen_dist_to_pm+Minority_gov)*Role,
                             data = party_level_senti.pdata, 
                             model = "within", 
                             effect="individual")

stargazer(mod.party_level_senti,ci = T,type = "text")
stargazer(mod.party_level_senti)

#Extract coefficients for plotting
s <- summary(mod.party_level_senti)

model.senti.results <- data.frame(
  variable = rep(NA,length(row.names(s$coefficients))), 
  coef = rep(NA,length(row.names(s$coefficients))),
  ci.low = rep(NA,length(row.names(s$coefficients))),
  ci.up = rep(NA,length(row.names(s$coefficients))))

model.senti.results$variable=row.names(s$coefficients)
model.senti.results$coef=s$coefficients[,1]
model.senti.results$ci.low=s$coefficients[,1]-1.96*(s$coefficients[,2])
model.senti.results$ci.up=s$coefficients[,1]+1.96*(s$coefficients[,2])
model.senti.results

table(party_level_senti$Minority_gov)
coefficient_labels <- c("Last Year Before an Election","Left-Right distance to prime minister party","Minority Government",
                        "Junior Coalition Partner",
                        "Prime Minister Party" ,
                        "Last Year Before an Election X Junior Coalition Partner","Last Year Before an Election X Prime Minister Party",
                        "Left-Right distance to prime minister party X Junior Coalition Partner","Minority Government X Junior Coalition Partner","Minority Government X Prime Minister Party")
model.senti.results$labels=coefficient_labels
model.senti.results$labels <- factor(model.senti.results$labels,
                                     levels = c("Prime Minister Party", "Junior Coalition Partner", 
                                                "Left-Right distance to prime minister party",
                                                "Minority Government","Last Year Before an Election",
                                                "Last Year Before an Election X Prime Minister Party", 
                                                "Last Year Before an Election X Junior Coalition Partner",
                                                "Left-Right distance to prime minister party X Junior Coalition Partner",
                                                "Minority Government X Junior Coalition Partner",
                                                "Minority Government X Prime Minister Party"))

model.senti.results$labels=factor(model.senti.results$labels, levels=rev(levels(model.senti.results$labels)))
p.senti <- ggplot(data=model.senti.results) +
  theme_bw() +ggtitle("")+
  geom_pointrange(
    aes(x=labels, y=coef, ymin=ci.low, ymax=ci.up),
    lwd = 1/2, position = position_dodge(width = 1/2),
    shape = 21, fill = "black") +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  coord_flip() +
  xlab("") +
  ylab("Coefficients and 95% confidence intervals") +
  theme(text = element_text(size=16)) +
  theme(axis.title.x = element_text(size=12, vjust=-0.5))


###############################################
# Overall Sentiment in the Legislatures
###############################################

mean_senti_overall=data.frame(Country=c(rep("Czech Republic",3),
                                        rep("Finland",3),
                                        rep("Germany",3),
                                        rep("Netherlands",3),
                                        rep("Spain",3),
                                        rep("Sweden",3),
                                        rep("United Kingdom",3)),
                              Role=c(rep(c("Prime Minister Party","Junior Coalition Partner","Opposition"))))


mean_senti_overall$Avg_Senti=NA
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Czech Republic"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="Czech Republic"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Czech Republic"&mean_senti_overall$Role=="Junior Coalition Partner"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Junior Coalition Partner"&senti_role_all$country_name=="Czech Republic"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Czech Republic"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="Czech Republic"])

mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Finland"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="Finland"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Finland"&mean_senti_overall$Role=="Junior Coalition Partner"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Junior Coalition Partner"&senti_role_all$country_name=="Finland"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Finland"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="Finland"])

mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Germany"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="Germany"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Germany"&mean_senti_overall$Role=="Junior Coalition Partner"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Junior Coalition Partner"&senti_role_all$country_name=="Germany"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Germany"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="Germany"])

mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Netherlands"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="Netherlands"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Netherlands"&mean_senti_overall$Role=="Junior Coalition Partner"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Junior Coalition Partner"&senti_role_all$country_name=="Netherlands"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Netherlands"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="Netherlands"])

mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Spain"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="Spain"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Spain"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="Spain"])

mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Sweden"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="Sweden"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Sweden"&mean_senti_overall$Role=="Junior Coalition Partner"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Junior Coalition Partner"&senti_role_all$country_name=="Sweden"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="Sweden"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="Sweden"])

mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="United Kingdom"&mean_senti_overall$Role=="Prime Minister Party"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Prime Minister Party"&senti_role_all$country_name=="United Kingdom"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="United Kingdom"&mean_senti_overall$Role=="Junior Coalition Partner"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Junior Coalition Partner"&senti_role_all$country_name=="United Kingdom"])
mean_senti_overall$Avg_Senti[mean_senti_overall$Country=="United Kingdom"&mean_senti_overall$Role=="Opposition"]=mean(senti_role_all$Sentiment[senti_role_all$Role=="Opposition"&senti_role_all$country_name=="United Kingdom"])

mean_senti_overall=mean_senti_overall%>%filter(!is.na(Avg_Senti))

mean_senti_overall$Role=factor(mean_senti_overall$Role)
mean_senti_overall$Role=relevel(mean_senti_overall$Role, "Opposition")

pointplot_countries_senti=ggplot(mean_senti_overall,aes(x=Avg_Senti,y=reorder(Country,Avg_Senti),colour=Role,shape=Role))+
  geom_point(size=8)+
  theme_bw()+
  theme(panel.grid.major.x=element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major.y=element_line(size=1,colour="grey88",linetype="dashed"),
        axis.text=element_text(size=18,colour="black"),axis.title=element_text(size=18),
        legend.background = element_rect(color = "black",
                                         fill = "white", size = 0.5, linetype = "solid"),
        legend.text=element_text(size=14),
        legend.title = element_blank())+
  labs(x="Sentiment",y="")+
  scale_shape_manual(values=c(16,18,15))


